* same as analysis_code_revised_R2 (in version of record) but uses AHHS's publicly posted erratum data (and inserts some blank lines etc.). Additional analyses are in analysis_code_v10/11 (original submission code, posted on prior versions of repository)
* user-written packages requires: estout and boottest

cap log close
log using analysis/analysis_output/regressions_final, replace

global weather6t4  "temp6t4 press6t4 dew6t4 prcp6t4 wind6t4 skycover"
global coreweather "temp6t4 prcp6t4 skycover"

estimates clear
set emptycells drop

/**************************************************
***************************************************
	PROGRAMS
***************************************************
*/
program drop _all

program define mytable // puts out results table -- only one command but with lots of settings that would otherwise need to be repeated (and repeatedly adjusted if editing the code)
	syntax namelist using
	if strmatch(`"`using'"',"*.tex*") local tex \ // escape character for TeX output of {}
	esttab `namelist' `using', replace ///
	keep(temp6t4) varlabels(temp6t4 "Temperature (\$10^\circ\$ F)") transform(@*1000 1000) b(%4.2f) se nostar noobs ///
	stats(r2 N N_clust Ncityclusters F_joint boot_p_weather_city_month boot_p_weather_ct F_joint_core boot_p_coreweather_city_month boot_p_coreweather_ct ll_city_month ul_city_month ll_ct ul_ct, ///
		labels("\$R^2\$" "\$N\$" "City-Months" Cities "\$F\$" "bootstrap \$p\$ (city-month)" "bootstrap \$p\$ (city)" "\$F\$" "bootstrap \$p\$ (city-month)" "bootstrap \$p\$ (city)" "" "" "" "") /// may want to switch this off for debugging
		fmt(%4.2f %8.0fc %8.0gc %8.0gc %4.2f) ///
		layout(@ @ @ @ @ @ @ @ @ @ `""[@,@]""' `""`tex'{@,@`tex'}""'))
	end
	
program define reg_and_tests // performs regressions and post-estimation tests (esp. boottest) -- uses AHSS's data naming
	qui { // make everything quiet except boottest (dots!) so one knows where the code is at
		reg $decision $weather6t4 $controls i.($dummies) `0', cluster(city_month) // `0' is optional if statement and/or additional regressors. Can't use reghdfe bc of boottest
		testparm $weather6t4
		estadd scalar F_joint   = r(F)
		estadd scalar F_joint_p = r(p)
		testparm $coreweather
		estadd scalar F_joint_core   = r(F)
		estadd scalar F_joint_core_p = r(p)
		tab ct if e(sample)
		estadd scalar Ncityclusters=r(r)
		foreach cluster in city_month ct {
			noisily boottest {temp} {$weather6t4} {$coreweather}, reps(99999) nograph cluster(`cluster')
			estadd scalar boot_p_temp_`cluster' = r(p_1)
			estadd scalar boot_p_weather_`cluster' = r(p_2)
			estadd scalar boot_p_coreweather_`cluster' = r(p_3)
			mat ci = r(CI_1)
			estadd scalar ll_`cluster' = ci[1,1]*1000
			estadd scalar ul_`cluster' = ci[1,2]*1000
			}
		_eststo
		}
	end
	
program define test_environmental // tests if envir'l variables (a) are significant, (b) create omitted variable bias if omitted. Only argument: comma-separated distances string
	qui: eststo full: reg $decision $weather6t4 $pollutants i.($dummies) if max(`1')<20*1.60934
	qui: eststo rest: reg $decision $weather6t4				i.($dummies) if _est_full
	qui: suest full rest, vce(cluster city_month)
	test [full_mean]: $pollutants // (a)
	test [full_mean=rest_mean]: $weather6t4 // (b)
	estimates drop full rest
	end
	
program define missingdummies // creates missing dummies for a varlist (the only, required argument)
	global mid ""
	foreach var of varlist `0' {
		count if `var'==.
		assert r(N)>0
		gen byte mid_`var'=mi(`var')
		recode `var' (.=0)
		global mid "$mid mid_`var'"
		}
	end
	
program define loadandprep // loads and prepares AHSS's final data (matched.dta), or data reconstructed from their raw data and using their variable naming. Argument: filepath/name
	use res city chair c_asy_type nat_name $weather6t4 $pollutants d* using `"`0'"', clear // d* gets date and any distance variable
	cap g int  year=year(date) // in case year is already in the data (in my improved data it is)
	g byte month=month(date)
	g byte dayofweek=dow(date)
	encode city, g(ct)
	egen int city_month = group(ct month)
	encode c_asy_type, g(type)
	encode nat_name, g(nati)
	end
	
program define dataassembly // prepares my data, incl. USSC: creates joined dataset of decision and weather as function of chosen location (base, hearings, district_court [USSC]) and decision date (Asylum: hearing, comp_date, or combination; USSC: sentdate)
	local noaaweather "temp_noaa_6a4p pressure_6a4p dew_noaa_6a4p prec_noaa_6a4p wind_speed_noaa_6a4p sky_noaa_6a4p"
	use location_code date `noaaweather' $pollutants using "data/weather/NOAA_AQS.dta" if !mi(temp_noaa_6a4p) & substr(location_code,5,.)=="`1'", clear
	replace location_code = substr(location_code,1,3)
	rename (`noaaweather') ($weather6t4) // this allows using the same reg_and_tests code for AHSS and my data
	tempfile weather
	save `weather'
	
	if inlist("`1'","base","hearings") { // asylum data
		if "`1'"=="base" local location base_city_B
		else local location hearing_loc_B
		use `location' comp_date latest_hearing grant judge defensive nationality using "data/EOIR_asylum/asylum_revised.dta" if !mi(comp_date), clear // cf. asylum_data_cleaning_and_checking.do: already limited to years 1990-
		gen grant_AHSS = grant==1 // this fills in zeroes for completions other than denials
		decode `location', gen(location_code)
		gen int year = year(comp_date)
		gen int fyear = year + inrange(month(comp_date),10,12) // fiscal year
		}
	else if "`1'"=="district_court" { // sentencing data
		tempfile ussc
		copy "https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/TZRNKD/IZ0GTW" `ussc' // from Spamann PsySci 2018
		use distr sentdate ussc_fy logsent prison logsent1 csenttt0 offtype2 crimhist offensl cstatmin noconvic trial mrace hispanic newcit gender neweduc offage offage2 dependents ///
			using `ussc' if offensl<44, clear // Max.(offensl) in sentencing grid is 43 (~600 obs. >43, + 39k missing)
		merge m:1 distr using "data/USSC/courts_crosswalk.dta", assert(2 3) keep(3) nogenerate
		gen statminimum = cstatmin>0 & cstatmin<.
		rename ussc_fy fyear 
		}
	
	gen int date = `2' 
	if inlist("`1'","base","hearings") drop comp_date latest_hearing `location'
	else if "`1'"=="district_court" drop sentdate distr cstatmin
	merge m:1 location_code date using `weather', nogen keep(1 3)
	
	gen byte month = month(date)
	gen byte dow = dow(date)
	encode location_code, gen(ct)
	egen city_month = group(ct month)
	drop location_code month
	compress
	
	end

	
/*********************************************
**********************************************
	ASYLUM
**********************************************
*********************************************/

/************************************
	using THEIR data
*************************************/
* for ease of comparison, the names of variables (in this part) and globals (everywhere) are the same as in AHSS's regression.do; however, the commands are streamlined
global dummies "dayofweek nati type year ct#month chair"
global pollutants "ozone co pm25"
global controls "$pollutants" // different from $pollutants because it MAY include other variables (and will, below)
global decision "res" // defining this allows using the same simple program reg_and_tests for both their and my data

* their original, final data
loadandprep data/original_article/data/Data/final/matched.dta
reg_and_tests
reg_and_tests if max(distance,dsky)<20*1.60934
reg_and_tests if max(dtemp,dwind,dpress,dsky)<20*1.60934
	/* notes on distance conditioning:
		a) miles vs. km: their organize.do uses geodist without miles option, so measures should be in km
		b) which distance variable to use:
			i) I prefer "distance" because it presumably* refers to hourly measures, which are those that count
				* "distance" was in monthly files of hourly data before hourlyweatherclean.do dropped it (line 27)
			ii) the others refer to right weather aspects but probably for daily measures and incomplete (missing precipitation and dew point, unless want to use drh instead)
		c) condition "" is their specification -- should give same result */
test_environmental "distance,dsky"

* their erratum data
use data/original_article/data_Errata/Data/final/matched.dta, clear
rename pm pm25 // change to naming convention in AHSS's original data
tempfile Errata_data // this and save/load cycle on next two lines are an unnecessary I/O operation but allow use of loadandprep without alteration
save `Errata_data'
loadandprep `Errata_data'
reg_and_tests
reg_and_tests if nati!="CHINA":nati // 5 more observations than in published version of my comment because Heyes & Saberian's final erratum data differ slightly from what they had given me during the editorial back-and-forth

*** my reconstruction of their data from their input files

loadandprep data/original_article/reconstructed_data/matched_HS_improved_corrections.dta // GMT adjustment and city locations fixed plus smaller stuff
reg_and_tests
reg_and_tests if max(dhourly,dsky)<20*1.60934 // weather measurement from 20 mile radius
reg_and_tests if max(dhourly,dsky,dco,dpm,dozone)<20*1.60934 // also restricting environmental measurements to <=20 miles
foreach distances in "dhourly,dsky" "dhourly,dsky,dco,dpm,dozone" {
	test_environmental "`distances'"
}

* now adding some improvements (still proceeding from their raw data):
* 1. stable weather stations & crutch for missing 2001 CO data
preserve // getting the stable weather stations for each city-date
	use city date $weather6t4 $pollutants using "data/original_article/reconstructed_data/matched_HS_improved_corrections_stablestations", clear
	contract *
	tempfile stablemeasurements
	save `stablemeasurements'
restore
drop $weather6t4 // makes space for the stable measurements. merge w/ update replace does not work because it doesn't set to missing when var is missing in using data
merge m:1 city date using `stablemeasurements', assert(1 3) keep(3) keepusing($weather6t4) nogen // swaps out shortest-distance weather measurement against stable station ~
assert mi(co) if year==2001 // this and next two lines: replace missing CO in 2001 with CO mean
qui sum co, meanonly
replace co = r(mean) if year==2001
reg_and_tests
* 2. + stable pollutant measurement stations, without and with addressing missingness through missing dummies
drop $pollutants
merge m:1 city date using `stablemeasurements', assert(3) keepusing($pollutants) nogen // swaps out shortest-distance pollution measurement against stable station ~
reg_and_tests
missingdummies $pollutants
reg_and_tests $mid


/************************************
	using MY data
*************************************/
global pollutants "pm25 ozone carbon_monoxide" // notice different names from AHSS data
global controls "$pollutants"
global dummies "judge defensive nationality dow year city_month \$mid" // the backslash prevents macro expansion of $mid until dummies is used in reg_and_tests

*** Regressions ***
* using completion date
dataassembly base comp_date
missingdummies $pollutants

global decision "grant_AHSS" // this compares grants to all other completions
reg_and_tests if inrange(fyear,2000,2004) & inrange(year,2000,2004) // AHSS sample period
reg_and_tests

drop if mi(grant)
global decision "grant" // this compares grants only to denials
reg_and_tests if inrange(fyear,2000,2004) & inrange(year,2000,2004)
reg_and_tests
global dummies "`: subinstr global dummies "year" ""' fyear" // substitute fiscal for calendar year
reg_and_tests

* using latest hearing
dataassembly base "latest_hearing if latest_hearing<=comp_date"
missingdummies $pollutants
reg_and_tests // slight deviation of this result from original AEJ submission is from changes in NOAA download -- see note_on_minor_discrepancies_between_versions.docx

* R2 addition: what if one drops China, as in AHSS's erratum? (cf. analysis of AHSS erratum data above)
reg_and_tests if inrange(fyear,2000,2004) & inrange(year,2000,2004) & nati!="CH":nationality
reg_and_tests if nati!="CH":nationality
reg_and_tests if !(nati=="CH":nationality & fyear>=1997 & date<date("20050511","YMD")) // CPC was enacted 1996/9/30 (PL 104-208) and the conditionality ended 2005/5/11 (PL 109-13)

/*************************************
	output
*************************************/
qui estimates dir
mytable `r(names)' using "analysis/analysis_output/final_asylum_regressions.html"
mytable est1 est9 est17 est18 est20 est6 est19 est21 est23 est25 using "analysis/analysis_output/final_table1.tex" // referencing by number is suboptimal ... -- careful when adding eststo commands (which both reg_and_tests and test_environmental do)!

* saving in case I need them again:
qui estimates dir
foreach e in `r(names)' {
	qui estimates restore `e'
	estimates save "analysis/analysis_output/`e'", replace // recover with foreach e in [namelist] { estimates use "analysis/analysis_output/`e'"; eststo `e' }
}


/*********************************************
**********************************************
	SENTENCING
**********************************************
*********************************************/
estimates clear // otherwise the numbering gets messy
global dummies "offtype2 crimhist##offensl statminimum noconvic trial mrace hispanic newcit gender neweduc \$mid dow fyear city_month" // fixed effects
global pollutants "pm25 ozone carbon_monoxide" 
global controls "$pollutants offage offage2 dependents" // continuous regressors
dataassembly district_court sentdate
missingdummies $pollutants

foreach depvar in prison logsent logsent1 csenttt0 { // logsent1 is ln(1+sentence). All sent vars are top-censored at 470 months punishment (life)
	global decision `depvar'
	reg_and_tests
}

qui estimates dir
mytable `r(names)' using "analysis/analysis_output/RR_USSC_regressions.html"
mytable est1 est2 using "analysis/analysis_output/RR_table2.tex"

qui estimates dir
foreach e in `r(names)' {
	qui estimates restore `e'
	estimates save "analysis/analysis_output/USSC_`e'", replace // recover with forvalues e=1/X { estimates use "analysis/analysis_output/USSC_est`e'"; eststo }
}

log close